In [106]:
%matplotlib inline
import pandas as pd
import numpy as np
import random as rnd
import seaborn as sns
import matplotlib.pyplot as plt

Importar datos de entreno


In [109]:
data = pd.read_csv('train.csv', header=None ,delimiter=";")
feature_names = ['usuario', 'palabra', 'palabraLeida', 'tiempoCaracter',
       'hayErrPalabra', 'tiempoErrPalabra', 'numPalabra','tiempoPalabra', 'tamPalabra', 'caracter',
       'falloCaracter', 'palabraCorrecta']
data.columns = feature_names

Importar datos para predecir


In [110]:
predict = pd.read_csv('predict.csv', header=None ,delimiter=";")
feature_names = ['usuario', 'palabra', 'palabraLeida', 'tiempoCaracter',
       'hayErrPalabra', 'tiempoErrPalabra', 'numPalabra','tiempoPalabra', 'tamPalabra', 'caracter',
       'falloCaracter', 'palabraCorrecta']
predict.columns = feature_names

In [111]:
data[data['caracter'] == 'Z']


Out[111]:
usuario palabra palabraLeida tiempoCaracter hayErrPalabra tiempoErrPalabra numPalabra tiempoPalabra tamPalabra caracter falloCaracter palabraCorrecta
0 Cristhian ZANAHORIABAJA Z 2.113 False 0.0 0 0.0 13 Z False False
17 Jesus ZANAHORIABAJA Z 1.529 False 0.0 0 0.0 13 Z False False
33 Jesus ZANAHORIABAJA Z -1.138 False 0.0 0 0.0 13 Z False False
68 Jesus PQINZLFGKWQUVYVNREUS PQINZ 0.332 False 0.0 0 0.0 20 Z False False
117 Jesus MEDESCORAZONARE MEDESCORAZ 0.467 False 0.0 0 0.0 15 Z False False
141 Jesus QWMNZPBXJJBXBJX QWMNZ 0.359 False 0.0 0 0.0 15 Z False False
158 Jesus QWMNZPBXJJBXBJX QWMNZ 0.788 False 0.0 0 0.0 15 Z False False
171 Jesus PZKOFTLILILILI PZ 0.389 False 0.0 0 0.0 14 Z False False
205 Jesus ZIZIZIHIZIZHIHIHI Z -1.129 False 0.0 0 0.0 17 Z False False
207 Jesus ZIZIZIHIZIZHIHIHI ZIZ 0.448 False 0.0 0 0.0 17 Z False False
209 Jesus ZIZIZIHIZIZHIHIHI ZIZIZ 0.450 False 0.0 0 0.0 17 Z False False
213 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZ 0.319 False 0.0 0 0.0 17 Z False False
215 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZIZ 0.375 False 0.0 0 0.0 17 Z False False
357 Jesus ZANAHORIABAJA Z 0.868 False 0.0 0 0.0 13 Z False False
392 Jesus PQINZLFGKWQUVYVNREUS PQINZ 0.346 False 0.0 0 0.0 20 Z False False
435 Jesus MEDESCORAZONARE MEDESCORAZ 0.286 False 0.0 0 0.0 15 Z False False
459 Jesus QWMNZPBXJJBXBJX QWMNZ 0.794 False 0.0 0 0.0 15 Z False False
472 Jesus PZKOFTLILILILI PZ 1.234 False 0.0 0 0.0 14 Z False False
504 Jesus ZIZIZIHIZIZHIHIHI Z 0.704 False 0.0 0 0.0 17 Z False False
506 Jesus ZIZIZIHIZIZHIHIHI ZIZ 0.474 False 0.0 0 0.0 17 Z False False
508 Jesus ZIZIZIHIZIZHIHIHI ZIZIZ 0.361 False 0.0 0 0.0 17 Z False False
512 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZ 0.403 False 0.0 0 0.0 17 Z False False
514 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZIZ 0.687 False 0.0 0 0.0 17 Z False False
672 Jesus ZANAHORIABAJA Z 0.847 False 0.0 0 0.0 13 Z False False
708 Jesus PQINZLFGKWQUVYVNREUS PQINZ 0.395 False 0.0 0 0.0 20 Z False False
751 Jesus MEDESCORAZONARE MEDESCORAZ 0.417 False 0.0 0 0.0 15 Z False False
775 Jesus QWMNZPBXJJBXBJX QWMNZ 0.317 False 0.0 0 0.0 15 Z False False
788 Jesus PZKOFTLILILILI PZ 0.986 False 0.0 0 0.0 14 Z False False
821 Jesus ZIZIZIHIZIZHIHIHI Z 0.595 False 0.0 0 0.0 17 Z False False
823 Jesus ZIZIZIHIZIZHIHIHI ZIZ 0.361 False 0.0 0 0.0 17 Z False False
... ... ... ... ... ... ... ... ... ... ... ... ...
1035 Jesus PQINZLFGKWQUVYVNREUS PQINZ 1.195 False 0.0 0 0.0 20 Z False False
1081 Jesus MEDESCORAZONARE MEDESCORAZ 0.420 False 0.0 0 0.0 15 Z False False
1105 Jesus QWMNZPBXJJBXBJX QWMNZ 0.273 False 0.0 0 0.0 15 Z False False
1118 Jesus PZKOFTLILILILI PZ 0.702 False 0.0 0 0.0 14 Z False False
1150 Jesus ZIZIZIHIZIZHIHIHI Z -0.078 False 0.0 0 0.0 17 Z False False
1152 Jesus ZIZIZIHIZIZHIHIHI ZIZ 0.527 False 0.0 0 0.0 17 Z False False
1154 Jesus ZIZIZIHIZIZHIHIHI ZIZIZ 0.308 False 0.0 0 0.0 17 Z False False
1158 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZ 0.309 False 0.0 0 0.0 17 Z False False
1160 Jesus ZIZIZIHIZIZHIHIHI ZIZIZIHIZIZ 0.395 False 0.0 0 0.0 17 Z False False
1302 Cristhian ZANAHORIABAJA Z -0.067 False 0.0 0 0.0 13 Z False False
1337 Cristhian PQINZLFGKWQUVYVNREUS PQINZ 0.259 False 0.0 0 0.0 20 Z False False
1381 Cristhian MEDESCORAZONARE MEDESCORAZ 0.357 False 0.0 0 0.0 15 Z False False
1405 Cristhian QWMNZPBXJJBXBJX QWMNZ 0.458 False 0.0 0 0.0 15 Z False False
1418 Cristhian PZKOFTLILILILI PZ 0.318 False 0.0 0 0.0 14 Z False False
1432 Cristhian PZKOFTLILILILI PZ 0.456 False 0.0 0 0.0 14 Z False False
1464 Cristhian ZIZIZIHIZIZHIHIHI Z -0.114 False 0.0 0 0.0 17 Z False False
1466 Cristhian ZIZIZIHIZIZHIHIHI ZIZ 0.399 False 0.0 0 0.0 17 Z False False
1468 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZ 0.320 False 0.0 0 0.0 17 Z False False
1472 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZIHIZ 0.420 False 0.0 0 0.0 17 Z False False
1474 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZIHIZIZ 0.330 False 0.0 0 0.0 17 Z False False
1655 Cristhian ZANAHORIABAJA Z -0.240 False 0.0 0 0.0 13 Z False False
1690 Cristhian PQINZLFGKWQUVYVNREUS PQINZ 0.597 False 0.0 0 0.0 20 Z False False
1733 Cristhian MEDESCORAZONARE MEDESCORAZ 0.338 False 0.0 0 0.0 15 Z False False
1757 Cristhian QWMNZPBXJJBXBJX QWMNZ 1.031 False 0.0 0 0.0 15 Z False False
1771 Cristhian PZKOFTLILILILI PZ 0.269 False 0.0 0 0.0 14 Z False False
1803 Cristhian ZIZIZIHIZIZHIHIHI Z -0.211 False 0.0 0 0.0 17 Z False False
1805 Cristhian ZIZIZIHIZIZHIHIHI ZIZ 0.340 False 0.0 0 0.0 17 Z False False
1807 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZ 0.523 False 0.0 0 0.0 17 Z False False
1811 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZIHIZ 0.364 False 0.0 0 0.0 17 Z False False
1813 Cristhian ZIZIZIHIZIZHIHIHI ZIZIZIHIZIZ 0.402 False 0.0 0 0.0 17 Z False False

65 rows × 12 columns

Mapear los valores verdadero y falso a 1 y 0

hayErrPalabra, falloCaracter, palabraCorrecta


In [112]:
# Pasamos de boolean a un int, 1 para true y 0 para false
data["hayErrPalabra"] = data['hayErrPalabra'].map({False: 0, True: 1})
data["falloCaracter"] = data['falloCaracter'].map({False: 0, True: 1})
data["palabraCorrecta"] = data['palabraCorrecta'].map({False: 0, True: 1})

In [113]:
predict["hayErrPalabra"] = predict['hayErrPalabra'].map({False: 0, True: 1})
predict["falloCaracter"] = predict['falloCaracter'].map({False: 0, True: 1})
predict["palabraCorrecta"] = predict['palabraCorrecta'].map({False: 0, True: 1})

Quitarle los espacios en blanco al usuario


In [114]:
data["usuario"] = data["usuario"].str.strip()

In [115]:
predict["usuario"] = predict["usuario"].str.strip()

Mapear el usuario en un campo usuarioID


In [116]:
data["usuarioID"] = data['usuario'].map({"Cristhian": 0, "Jesus": 1})

In [117]:
predict["usuarioID"] = predict['usuario'].map({"Cristhian": 0, "Jesus": 1})

Dejar solo los caracteres comprendidos entre A y Z

Cuidado al hacer los tiempos de palabra, que se borran las filas que los contienen


In [118]:
data['caracter'] = data[data['caracter'].between('A', 'Z', inclusive=True)]['caracter']

In [119]:
predict['caracter'] = predict[predict['caracter'].between('A', 'Z', inclusive=True)]['caracter']

(Mirar si interesa hacer o no)

Mapear cada palabra a un numero para poder entrenar

Primero se crea un diccionar almacenando cada valor unico y luego se recorre cambiado los valores


In [334]:
d = {ni: indi for indi, ni in enumerate(set(data['palabra']))}
data['palabra'] = [d[ni] for ni in data['palabra']]

In [ ]:
d = {ni: indi for indi, ni in enumerate(set(predict['palabra']))}
predict['palabra'] = [d[ni] for ni in predict['palabra']]

(Mirar si interesa hacer o no)

Mapear cada caracter a un numero para poder entrenar

Primero se crea un diccionar almacenando cada valor unico y luego se recorre cambiado los valores


In [335]:
d = {ni: indi for indi, ni in enumerate(set(data['caracter']))}
data['caracter'] = [d[ni] for ni in data['caracter']]

In [ ]:
d = {ni: indi for indi, ni in enumerate(set(predict['caracter']))}
predict['caracter'] = [d[ni] for ni in predict['caracter']]

Sacar tiempo medio de escritura del mismo caracter

Hay que quitar los caracteres nulos


In [121]:
caracter = data[~data['caracter'].isnull()][['usuario', 'caracter','tiempoCaracter','falloCaracter']]
caracter['user'] = data['usuarioID']
caracter = caracter.groupby(['usuario','caracter']).mean()
targerCaracter = caracter['user']
caracter = caracter.drop(['user'], axis=1)
#caracter.iloc[0:3]
caracter


Out[121]:
tiempoCaracter falloCaracter
usuario caracter
Cristhian A 1.728459 0.000000
B 1.783818 0.090909
C 0.296130 0.043478
D 0.502786 0.142857
E 0.375219 0.000000
F 1.002364 0.090909
G 0.585786 0.000000
H 0.665267 0.133333
I 0.654110 0.060976
J 1.355105 0.000000
K 0.796903 0.483871
L 0.677808 0.230769
M 0.435462 0.000000
N 0.424111 0.055556
O 0.466745 0.042553
P 0.257000 0.068966
Q 0.621400 0.000000
R 0.385037 0.000000
S 0.462542 0.083333
T 0.108727 0.090909
U 0.592500 0.000000
V 1.362300 0.000000
W 0.758063 0.125000
X 0.949000 0.000000
Y 1.036917 0.000000
Z 0.393727 0.000000
Jesus A 0.853449 0.038462
B 1.058324 0.108108
C 0.392875 0.020833
D 0.542654 0.038462
E 0.483250 0.046875
F 0.887500 0.000000
G 0.957929 0.000000
H 0.756556 0.037037
I 0.527510 0.019355
J 0.897094 0.062500
K 1.127969 0.000000
L 0.644050 0.000000
M 0.429925 0.000000
N 0.519525 0.049505
O 0.434865 0.022472
P 0.334519 0.018519
Q 0.438087 0.043478
R 0.515750 0.050000
S 0.428951 0.024390
T 0.376650 0.100000
U 0.815900 0.066667
V 1.048350 0.000000
W 0.670552 0.000000
X 0.649100 0.200000
Y 0.926000 0.000000
Z 1.020186 0.000000

In [122]:
caracterPred = predict[~predict['caracter'].isnull()][['usuario', 'caracter','tiempoCaracter','falloCaracter']]
caracterPred['user'] = predict['usuarioID']
caracterPred = caracterPred.groupby(['usuario','caracter']).mean()
targerCaracterPred = caracterPred['user']
caracterPred = caracterPred.drop(['user'], axis=1)
#caracterPred.iloc[0:3]
caracterPred


Out[122]:
tiempoCaracter falloCaracter
usuario caracter
Cristhian A 0.569095 0.142857
B 1.009500 0.000000
C 0.389308 0.000000
D 0.337556 0.111111
E 0.503583 0.208333
F 1.528000 0.000000
G 0.855667 0.222222
H 0.398167 0.000000
I 1.026125 0.075000
J 1.131125 0.000000
K 1.040750 0.000000
L 0.381273 0.090909
M 1.865400 0.000000
N 0.475462 0.115385
O 0.483167 0.083333
P 0.124071 0.071429
Q 0.861000 0.000000
R 0.380778 0.166667
S 0.392167 0.000000
T 0.138250 0.000000
U 0.919100 0.200000
V 0.964833 0.166667
W 0.586875 0.125000
X 1.080750 0.000000
Y 0.816167 0.000000
Z 0.685667 0.083333
Jesus A 0.307900 0.050000
B 0.946875 0.000000
C -0.082455 0.000000
D 0.432500 0.000000
E -0.006687 0.062500
F 0.901500 0.166667
G 1.135857 0.000000
H 0.610143 0.000000
I 0.500436 0.000000
J 0.780714 0.000000
K 0.966750 0.000000
L 0.502700 0.000000
M 0.085286 0.071429
N 0.418423 0.076923
O 0.417227 0.000000
P -0.507231 0.000000
Q 0.061200 0.000000
R 0.273929 0.000000
S 0.424300 0.000000
T -0.041500 0.000000
U 0.681714 0.000000
V 0.683667 0.166667
W 0.476714 0.000000
X 0.430750 0.000000
Y 0.770167 0.000000
Z 0.048364 0.000000

Sacar tiempo medio de pulsado de enter (caracteres nulos)


In [342]:
Enter = data[data['caracter'].isnull()][['usuario','tiempoCaracter']]
Enter.columns = ['usuario', 'tiempoEnter']
Enter = Enter.groupby(['usuario']).mean()
Enter


Out[342]:
tiempoEnter
usuario
Cristhian 0.363036
Jesus 0.499133

Usuario, palabra, tiempo


In [343]:
usPalTiempo = data[data['caracter'].isnull()][['usuario', 'palabra', 'tiempoPalabra', 'tiempoErrPalabra','tamPalabra']]
usPalTiempo


Out[343]:
usuario palabra tiempoPalabra tiempoErrPalabra tamPalabra
9 Cristhian ZANAHORIA 7.342 0.000 9
20 Cristhian PANTALON 3.773 0.000 8
33 Cristhian PQINZLVNREUS 8.024 0.000 12
52 Cristhian MINERIA DE DATOS 0.000 0.000 16
72 Cristhian MINERIA DE DATOS 12.541 6.871 16
91 Cristhian CRISTHIANO RONALDO 5.548 0.000 18
104 Cristhian MIMIMIMIMIMI 4.488 0.000 12
111 Cristhian QWMNZP 5.207 0.000 6
126 Cristhian PZKOFTLILILILI 7.343 0.000 14
137 Cristhian PELIAGUDO 4.585 0.000 9
156 Cristhian ZIZIZIHIZIZHIHIHI 7.477 0.000 17
166 Cristhian DIMENSION 4.473 0.000 9
177 Cristhian CORRECCION 3.604 0.000 10
184 Cristhian PASION 2.673 0.000 6
192 Cristhian PRESION 2.145 0.000 7
203 Cristhian PRECISION 3.969 0.000 9
213 Cristhian PRIVACION 3.442 0.000 9
223 Cristhian TRADICION 2.862 0.000 9
231 Cristhian TENSION 2.353 0.000 7
244 Cristhian COMPOSICION 4.849 0.000 11
256 Cristhian COMPRENSION 0.000 0.000 11
268 Cristhian ZANAHORIA 7.503 0.000 9
277 Cristhian PANTALON 3.688 0.000 8
290 Cristhian PQINZLVNREUS 6.082 0.000 12
309 Cristhian MINERIA DE DATOS 6.487 0.000 16
330 Cristhian CRISTHIANO RONALDO 5.629 0.000 18
343 Cristhian MIMIMIMIMIMI 3.393 0.000 12
352 Cristhian QWMNZP 0.000 0.000 6
360 Cristhian QWMNZP 11.486 4.326 6
375 Cristhian PZKOFTLILILILI 7.526 0.000 14
... ... ... ... ... ...
1387 Cristhian DIMENSION 3.370 0.000 9
1398 Cristhian CORRECCION 3.888 0.000 10
1405 Cristhian PASION 3.174 0.000 6
1414 Cristhian PRESION 4.099 0.000 7
1424 Cristhian PRECISION 3.760 0.000 9
1434 Cristhian PRIVACION 3.431 0.000 9
1444 Cristhian TRADICION 2.937 0.000 9
1452 Cristhian TENSION 3.478 0.000 7
1466 Cristhian COMPOSICION 4.639 0.000 11
1478 Cristhian COMPRENSION 0.000 0.000 11
1490 Jesus ZANAHORIA 5.739 0.000 9
1499 Jesus PANTALON 3.099 0.000 8
1512 Jesus PQINZLVNREUS 7.595 0.000 12
1530 Jesus MINERIA DE DATOS 3.877 0.000 16
1549 Jesus CRISTHIANO RONALDO 5.689 0.000 18
1562 Jesus MIMIMIMIMIMI 4.239 0.000 12
1570 Jesus QWMNZP 4.751 0.000 6
1587 Jesus PZKOFTLILILILI 8.913 0.000 14
1597 Jesus PELIAGUDO 3.970 0.000 9
1615 Jesus ZIZIZIHIZIZHIHIHI 7.169 0.000 17
1624 Jesus DIMENSION 2.861 0.000 9
1636 Jesus CORRECCION 3.979 0.000 10
1643 Jesus PASION 2.318 0.000 6
1651 Jesus PRESION 2.764 0.000 7
1662 Jesus PRECISION 3.820 0.000 9
1672 Jesus PRIVACION 3.504 0.000 9
1683 Jesus TRADICION 4.408 0.000 9
1691 Jesus TENSION 2.618 0.000 7
1704 Jesus COMPOSICION 4.215 0.000 11
1716 Jesus COMPRENSION 0.000 0.000 11

144 rows × 5 columns


In [ ]:
usPalTiempoPred = predict[predict['caracter'].isnull()][['usuario', 'palabra', 'tiempoPalabra', 'tiempoErrPalabra','tamPalabra']]
usPalTiempoPred

Sacar la suma de fallos totales por palabra


In [370]:
falloCaracterPorPalabra = data.groupby(['usuario','palabra'])['falloCaracter'].sum()
falloCaracterPorPalabra


Out[370]:
usuario    palabra           
Cristhian  COMPOSICION            3
           COMPRENSION            7
           CORRECCION             0
           CRISTHIANO            38
           CRISTHIANO RONALDO     0
           DIMENSION              0
           MIMIMIMIMIMI           0
           MINERIA               57
           MINERIA DE DATOS       0
           PANTALON               2
           PASION                 0
           PELIAGUDO              3
           PQINZLVNREUS           0
           PRECISION              1
           PRESION                1
           PRIVACION              2
           PZKOFTLILILILI         0
           QWMNZP                 5
           TENSION                0
           TRADICION              3
           ZANAHORIA              2
           ZIZIZIHIZIZHIHIHI      1
Jesus      COMPOSICION            3
           COMPRENSION            1
           CORRECCION             1
           CRISTHIANO            27
           CRISTHIANO RONALDO     0
           DIMENSION              3
           MIMIMIMIMIMI           0
           MINERIA               27
           MINERIA DE DATOS       0
           PANTALON               0
           PASION                 2
           PELIAGUDO              0
           PQINZLVNREUS           1
           PRECISION              1
           PRESION                0
           PRIVACION              0
           PZKOFTLILILILI         2
           QWMNZP                 2
           TENSION                1
           TRADICION              1
           ZANAHORIA              4
           ZIZIZIHIZIZHIHIHI      0
Name: falloCaracter, dtype: int64

In [ ]:
falloCaracterPorPalabraPred = predict.groupby(['usuario','palabra'])['falloCaracter'].sum()
falloCaracterPorPalabraPred

Prueba tiempo correccion caracter


In [366]:
tiempoCoreccionCaracter = data[data['falloCaracter'] > 0].groupby(['usuario','palabra'])['tiempoCaracter'].sum()
tiempoCoreccionCaracter


Out[366]:
usuario    palabra          
Cristhian  COMPOSICION           1.041
           COMPRENSION           2.437
           CRISTHIANO           14.162
           MINERIA              23.449
           PANTALON              0.759
           PELIAGUDO             1.061
           PRECISION             0.197
           PRESION               0.271
           PRIVACION             0.686
           QWMNZP                3.318
           TRADICION            -0.130
           ZANAHORIA             0.845
           ZIZIZIHIZIZHIHIHI     0.314
Jesus      COMPOSICION           1.679
           COMPRENSION           0.453
           CORRECCION            0.507
           CRISTHIANO           13.505
           DIMENSION             1.662
           MINERIA              12.226
           PASION                1.831
           PQINZLVNREUS          0.594
           PRECISION             0.504
           PZKOFTLILILILI        1.536
           QWMNZP                1.175
           TENSION               0.352
           TRADICION             0.669
           ZANAHORIA             4.541
Name: tiempoCaracter, dtype: float64

Error en el entreno, hay un tiempo negativo, MIRAR


In [372]:
dataFallo = data[data['tiempoErrPalabra'] > 0]
dataFallo[dataFallo['palabra'] == "PZKOFTLILILILI"]


Out[372]:
usuario palabra palabraLeida tiempoCaracter hayErrPalabra tiempoErrPalabra numPalabra tiempoPalabra tamPalabra caracter falloCaracter palabraCorrecta usuarioID
1349 Cristhian PZKOFTLILILILI PZKOFTLILILILI 0.319 0 7.324 8 13.564 14 NaN 0 1 0

In [377]:



<pandas.core.indexing._LocIndexer object at 0x0AB411B0>

Sacar el tiempoPalabra medio de cada palabra del usuario para usarlo como modelo

TiempoErrPalabra no se si es muy util


In [345]:
tiempoMedioPalabra = usPalTiempo.drop(['tamPalabra'], axis=1)
tiempoMedioPalabra['user'] = data['usuarioID']
#usPalTiempo2['numPalabra'] = usPalTiempo['palabra']
tiempoMedioPalabra = tiempoMedioPalabra.groupby(['usuario','palabra']).mean()
tiempoMedioPalabra['falloCaracterPorPalabra'] = falloCaracterPorPalabra
targetTM = tiempoMedioPalabra['user']
tiempoMedioPalabra = tiempoMedioPalabra.drop(['user'], axis=1)
tiempoMedioPalabra


Out[345]:
tiempoPalabra tiempoErrPalabra falloCaracterPorPalabra
usuario palabra
Cristhian COMPOSICION 4.325250 0.0000 3
COMPRENSION 0.000000 0.0000 7
CORRECCION 3.596000 0.0000 0
CRISTHIANO RONALDO 5.775250 0.0000 0
DIMENSION 3.669500 0.0000 0
MIMIMIMIMIMI 4.500600 1.0824 0
MINERIA DE DATOS 5.634800 1.3742 0
PANTALON 3.632500 0.0000 2
PASION 2.704250 0.0000 0
PELIAGUDO 4.403000 0.0000 3
PQINZLVNREUS 6.851500 0.0000 0
PRECISION 3.544750 0.0000 1
PRESION 3.312250 0.0000 1
PRIVACION 4.236000 0.0000 2
PZKOFTLILILILI 7.012600 1.4648 0
QWMNZP 5.225600 0.8652 5
TENSION 2.808500 0.0000 0
TRADICION 3.533500 0.0000 3
ZANAHORIA 6.888000 0.0000 2
ZIZIZIHIZIZHIHIHI 6.641500 0.0000 1
Jesus COMPOSICION 4.637000 0.0000 3
COMPRENSION 0.000000 0.0000 1
CORRECCION 3.851667 0.0000 1
CRISTHIANO RONALDO 6.752333 0.0000 0
DIMENSION 4.034000 0.0000 3
MIMIMIMIMIMI 4.628333 0.0000 0
MINERIA DE DATOS 4.306000 0.0000 0
PANTALON 3.067000 0.0000 0
PASION 2.843667 0.0000 2
PELIAGUDO 4.388333 0.0000 0
PQINZLVNREUS 6.816333 0.0000 1
PRECISION 3.649667 0.0000 1
PRESION 2.748333 0.0000 0
PRIVACION 3.554000 0.0000 0
PZKOFTLILILILI 7.969333 0.0000 2
QWMNZP 4.603000 0.0000 2
TENSION 2.860000 0.0000 1
TRADICION 4.923667 0.0000 1
ZANAHORIA 7.215000 0.0000 4
ZIZIZIHIZIZHIHIHI 7.899000 0.0000 0

In [ ]:
tiempoMedioPalabraPred = usPalTiempoPred.drop(['tamPalabra'], axis=1)
tiempoMedioPalabraPred['user'] = predict['usuarioID']
#usPalTiempo2['numPalabra'] = usPalTiempo['palabra']
tiempoMedioPalabraPred = tiempoMedioPalabraPred.groupby(['usuario','palabra']).mean()
tiempoMedioPalabraPred['falloCaracterPorPalabra'] = falloCaracterPorPalabraPred
targetTM = tiempoMedioPalabraPred['user']
tiempoMedioPalabraPred = tiempoMedioPalabraPred.drop(['user'], axis=1)
tiempoMedioPalabraPred

Sacar tiempo medio por caracter por tamaño de palabra


In [226]:
usPalTiempo3 = usPalTiempo.drop(['palabra'], axis=1)
targetUS = usPalTiempo3['usuario']
usPalTiempo3 = usPalTiempo3.groupby(['usuario']).mean()
#usPalTiempo3['tiempoMedioCaracter'] = usPalTiempo3['tiempoPalabra'] / usPalTiempo3['tamPalabra']
usPalTiempo3


Out[226]:
tiempoPalabra tiempoErrPalabra tamPalabra
usuario
Cristhian 4.470893 0.284917 10.52381
Jesus 4.537333 0.000000 10.45000

In [67]:
usPalTiempo3['tiempoEnter'] = Enter
usPalTiempo3


Out[67]:
tiempoPalabra tamPalabra tiempoEnter
usuario
0 4.470893 10.52381 0.363036
1 4.537333 10.45000 0.499133

In [227]:
data


Out[227]:
usuario palabra palabraLeida tiempoCaracter hayErrPalabra tiempoErrPalabra numPalabra tiempoPalabra tamPalabra caracter falloCaracter palabraCorrecta usuarioID
0 Cristhian 3 Z 0.348 0 0.0 0 0.000 9 25 0 0 0
1 Cristhian 3 ZA 0.490 0 0.0 0 0.000 9 3 0 0 0
2 Cristhian 3 ZAN 0.702 0 0.0 0 0.000 9 15 0 0 0
3 Cristhian 3 ZANA 0.807 0 0.0 0 0.000 9 3 0 0 0
4 Cristhian 3 ZANAH 0.273 0 0.0 0 0.000 9 10 0 0 0
5 Cristhian 3 ZANAHO 0.276 0 0.0 0 0.000 9 14 0 0 0
6 Cristhian 3 ZANAHOR 0.352 0 0.0 0 0.000 9 19 0 0 0
7 Cristhian 3 ZANAHORI 0.321 0 0.0 0 0.000 9 9 0 0 0
8 Cristhian 3 ZANAHORIA 0.281 0 0.0 0 0.000 9 3 0 0 0
9 Cristhian 3 ZANAHORIA 0.369 0 0.0 1 7.342 9 0 0 0 0
10 Cristhian 17 P -0.818 0 0.0 0 0.000 8 17 0 0 0
11 Cristhian 17 PA -0.698 0 0.0 0 0.000 8 3 0 0 0
12 Cristhian 17 PAN 0.291 0 0.0 0 0.000 8 15 0 0 0
13 Cristhian 17 PANT 0.261 0 0.0 0 0.000 8 21 0 0 0
14 Cristhian 17 PANTA 0.237 0 0.0 0 0.000 8 3 0 0 0
15 Cristhian 17 PANTAL 0.319 0 0.0 0 0.000 8 13 0 0 0
16 Cristhian 17 PANTALO 0.378 0 0.0 0 0.000 8 14 0 0 0
17 Cristhian 17 PANTALOS 0.378 0 0.0 0 0.000 8 18 0 0 0
18 Cristhian 17 PANTALOSN 0.381 0 0.0 0 0.000 8 15 0 0 0
19 Cristhian 17 PANTALON 0.959 0 0.0 0 0.000 8 15 0 0 0
20 Cristhian 17 PANTALON 0.282 0 0.0 2 3.773 8 0 0 0 0
21 Cristhian 1 P -0.447 0 0.0 0 0.000 12 17 0 0 0
22 Cristhian 1 PQ 0.297 0 0.0 0 0.000 12 16 0 0 0
23 Cristhian 1 PQI 1.083 0 0.0 0 0.000 12 9 0 0 0
24 Cristhian 1 PQIN 1.237 0 0.0 0 0.000 12 15 0 0 0
25 Cristhian 1 PQINZ 0.300 0 0.0 0 0.000 12 25 0 0 0
26 Cristhian 1 PQINZL 0.453 0 0.0 0 0.000 12 13 0 0 0
27 Cristhian 1 PQINZLV 1.796 0 0.0 0 0.000 12 23 0 0 0
28 Cristhian 1 PQINZLVN 0.297 0 0.0 0 0.000 12 15 0 0 0
29 Cristhian 1 PQINZLVNR 0.991 0 0.0 0 0.000 12 19 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1687 Jesus 18 TENS 1.152 0 0.0 0 0.000 7 18 0 0 1
1688 Jesus 18 TENSI 0.382 0 0.0 0 0.000 7 9 0 0 1
1689 Jesus 18 TENSIO 0.346 0 0.0 0 0.000 7 14 0 0 1
1690 Jesus 18 TENSION 0.254 0 0.0 0 0.000 7 15 0 0 1
1691 Jesus 18 TENSION 0.487 0 0.0 18 2.618 7 0 0 0 1
1692 Jesus 12 C 0.514 0 0.0 0 0.000 11 4 0 0 1
1693 Jesus 12 CO 0.738 0 0.0 0 0.000 11 14 0 0 1
1694 Jesus 12 COM 0.408 0 0.0 0 0.000 11 12 0 0 1
1695 Jesus 12 COMP 0.382 0 0.0 0 0.000 11 17 0 0 1
1696 Jesus 12 COMPO 0.432 0 0.0 0 0.000 11 14 0 0 1
1697 Jesus 12 COMPOS 0.486 0 0.0 0 0.000 11 18 0 0 1
1698 Jesus 12 COMPOSI 0.786 0 0.0 0 0.000 11 9 0 0 1
1699 Jesus 12 COMPOSIC 0.612 0 0.0 0 0.000 11 4 0 0 1
1700 Jesus 12 COMPOSICI 0.737 0 0.0 0 0.000 11 9 0 0 1
1701 Jesus 12 COMPOSICIO 0.615 0 0.0 0 0.000 11 14 0 0 1
1702 Jesus 12 COMPOSICION 0.534 0 0.0 0 0.000 11 15 0 0 1
1703 Jesus 12 COMPOSICIONM 0.537 0 0.0 0 0.000 11 12 0 0 1
1704 Jesus 12 COMPOSICION 0.481 0 0.0 19 4.215 11 0 0 0 1
1705 Jesus 2 C 0.562 0 0.0 0 0.000 11 4 0 0 1
1706 Jesus 2 CO 0.955 0 0.0 0 0.000 11 14 0 0 1
1707 Jesus 2 COM 0.551 0 0.0 0 0.000 11 12 0 0 1
1708 Jesus 2 COMP 0.830 0 0.0 0 0.000 11 17 0 0 1
1709 Jesus 2 COMPR 0.605 0 0.0 0 0.000 11 19 0 0 1
1710 Jesus 2 COMPRE 0.643 0 0.0 0 0.000 11 5 0 0 1
1711 Jesus 2 COMPREN 0.614 0 0.0 0 0.000 11 15 0 0 1
1712 Jesus 2 COMPRENS 0.487 0 0.0 0 0.000 11 18 0 0 1
1713 Jesus 2 COMPRENSI 0.414 0 0.0 0 0.000 11 9 0 0 1
1714 Jesus 2 COMPRENSIO 0.281 0 0.0 0 0.000 11 14 0 0 1
1715 Jesus 2 COMPRENSION 0.383 0 0.0 0 0.000 11 15 0 0 1
1716 Jesus 2 COMPRENSION 0.492 0 0.0 19 0.000 11 0 0 0 1

1717 rows × 13 columns

Sacar el target


In [87]:
target = data['usuarioID']
target


Out[87]:
0       0
1       0
2       0
3       0
4       0
5       0
6       0
7       0
8       0
9       0
10      0
11      0
12      0
13      0
14      0
15      0
16      0
17      0
18      0
19      0
20      0
21      0
22      0
23      0
24      0
25      0
26      0
27      0
28      0
29      0
       ..
1687    1
1688    1
1689    1
1690    1
1691    1
1692    1
1693    1
1694    1
1695    1
1696    1
1697    1
1698    1
1699    1
1700    1
1701    1
1702    1
1703    1
1704    1
1705    1
1706    1
1707    1
1708    1
1709    1
1710    1
1711    1
1712    1
1713    1
1714    1
1715    1
1716    1
Name: usuarioID, dtype: int64

In [ ]:
targetPred = predict['usuarioID']
targetPred

Eliminar campos sobrantes (Usuario, palabra, palabraLeida, numPalabra, tamPalabra, caracter, usuarioID)


In [303]:
data = data.drop(['usuario','palabraLeida','numPalabra', 'tamPalabra','usuarioID'], axis=1)

In [ ]:
predict = predict.drop(['usuario','palabraLeida','numPalabra', 'tamPalabra','usuarioID'], axis=1)

In [304]:
#'palabra', (mirar estos) 'falloCaracter' 'palabraCorrecta', 'hayErrPalabra'
data


Out[304]:
palabra tiempoCaracter hayErrPalabra tiempoErrPalabra tiempoPalabra caracter falloCaracter palabraCorrecta
0 3 0.348 0 0.0 0.000 25 0 0
1 3 0.490 0 0.0 0.000 3 0 0
2 3 0.702 0 0.0 0.000 15 0 0
3 3 0.807 0 0.0 0.000 3 0 0
4 3 0.273 0 0.0 0.000 10 0 0
5 3 0.276 0 0.0 0.000 14 0 0
6 3 0.352 0 0.0 0.000 19 0 0
7 3 0.321 0 0.0 0.000 9 0 0
8 3 0.281 0 0.0 0.000 3 0 0
9 3 0.369 0 0.0 7.342 0 0 1
10 17 -0.818 0 0.0 0.000 17 0 0
11 17 -0.698 0 0.0 0.000 3 0 0
12 17 0.291 0 0.0 0.000 15 0 0
13 17 0.261 0 0.0 0.000 21 0 0
14 17 0.237 0 0.0 0.000 3 0 0
15 17 0.319 0 0.0 0.000 13 0 0
16 17 0.378 0 0.0 0.000 14 0 0
17 17 0.378 0 0.0 0.000 18 1 0
18 17 0.381 0 0.0 0.000 15 1 0
19 17 0.959 0 0.0 0.000 15 0 0
20 17 0.282 0 0.0 3.773 0 0 1
21 1 -0.447 0 0.0 0.000 17 0 0
22 1 0.297 0 0.0 0.000 16 0 0
23 1 1.083 0 0.0 0.000 9 0 0
24 1 1.237 0 0.0 0.000 15 0 0
25 1 0.300 0 0.0 0.000 25 0 0
26 1 0.453 0 0.0 0.000 13 0 0
27 1 1.796 0 0.0 0.000 23 0 0
28 1 0.297 0 0.0 0.000 15 0 0
29 1 0.991 0 0.0 0.000 19 0 0
... ... ... ... ... ... ... ... ...
1687 18 1.152 0 0.0 0.000 18 0 0
1688 18 0.382 0 0.0 0.000 9 0 0
1689 18 0.346 0 0.0 0.000 14 0 0
1690 18 0.254 0 0.0 0.000 15 0 0
1691 18 0.487 0 0.0 2.618 0 0 1
1692 12 0.514 0 0.0 0.000 4 0 0
1693 12 0.738 0 0.0 0.000 14 0 0
1694 12 0.408 0 0.0 0.000 12 0 0
1695 12 0.382 0 0.0 0.000 17 0 0
1696 12 0.432 0 0.0 0.000 14 0 0
1697 12 0.486 0 0.0 0.000 18 0 0
1698 12 0.786 0 0.0 0.000 9 0 0
1699 12 0.612 0 0.0 0.000 4 0 0
1700 12 0.737 0 0.0 0.000 9 0 0
1701 12 0.615 0 0.0 0.000 14 0 0
1702 12 0.534 0 0.0 0.000 15 0 0
1703 12 0.537 0 0.0 0.000 12 1 0
1704 12 0.481 0 0.0 4.215 0 0 1
1705 2 0.562 0 0.0 0.000 4 0 0
1706 2 0.955 0 0.0 0.000 14 0 0
1707 2 0.551 0 0.0 0.000 12 0 0
1708 2 0.830 0 0.0 0.000 17 0 0
1709 2 0.605 0 0.0 0.000 19 0 0
1710 2 0.643 0 0.0 0.000 5 0 0
1711 2 0.614 0 0.0 0.000 15 0 0
1712 2 0.487 0 0.0 0.000 18 0 0
1713 2 0.414 0 0.0 0.000 9 0 0
1714 2 0.281 0 0.0 0.000 14 0 0
1715 2 0.383 0 0.0 0.000 15 0 0
1716 2 0.492 0 0.0 0.000 0 0 0

1717 rows × 8 columns

Cambiar datos malos por las mejoras


In [211]:
tiempoPorPalabra = data[data['tiempoErrPalabra'] > 0][['palabra','tiempoPalabra', 'tiempoErrPalabra', 'palabraCorrecta']]

tiempoPorPalabra


Out[211]:
palabra tiempoPalabra tiempoErrPalabra palabraCorrecta
72 9 12.541 6.871 0
360 10 11.486 4.326 0
1314 5 10.362 5.412 0
1349 14 13.564 7.324 0

In [161]:
#data['tiempoPalabra'] = [tiempoPorPalabra['tiempoPalabra'] for tiempoPorPalabra['tiempoPalabra'] in data['tiempoPalabra']]
data2 = data.copy()

In [163]:
data2 = data2.drop(['tiempoPalabra', 'tiempoErrPalabra'], axis=1)

In [144]:
#data2["tiempoPalabra"] = data2["palabra"].map(tiempoPorPalabra)

In [165]:
data2


Out[165]:
palabra tiempoCaracter caracter
0 3 0.348 25
1 3 0.490 3
2 3 0.702 15
3 3 0.807 3
4 3 0.273 10
5 3 0.276 14
6 3 0.352 19
7 3 0.321 9
8 3 0.281 3
9 3 0.369 0
10 17 -0.818 17
11 17 -0.698 3
12 17 0.291 15
13 17 0.261 21
14 17 0.237 3
15 17 0.319 13
16 17 0.378 14
17 17 0.378 18
18 17 0.381 15
19 17 0.959 15
20 17 0.282 0
21 1 -0.447 17
22 1 0.297 16
23 1 1.083 9
24 1 1.237 15
25 1 0.300 25
26 1 0.453 13
27 1 1.796 23
28 1 0.297 15
29 1 0.991 19
... ... ... ...
1687 18 1.152 18
1688 18 0.382 9
1689 18 0.346 14
1690 18 0.254 15
1691 18 0.487 0
1692 12 0.514 4
1693 12 0.738 14
1694 12 0.408 12
1695 12 0.382 17
1696 12 0.432 14
1697 12 0.486 18
1698 12 0.786 9
1699 12 0.612 4
1700 12 0.737 9
1701 12 0.615 14
1702 12 0.534 15
1703 12 0.537 12
1704 12 0.481 0
1705 2 0.562 4
1706 2 0.955 14
1707 2 0.551 12
1708 2 0.830 17
1709 2 0.605 19
1710 2 0.643 5
1711 2 0.614 15
1712 2 0.487 18
1713 2 0.414 9
1714 2 0.281 14
1715 2 0.383 15
1716 2 0.492 0

1717 rows × 3 columns


In [164]:
data


Out[164]:
palabra tiempoCaracter tiempoErrPalabra tiempoPalabra caracter
0 3 0.348 0.0 0.000 25
1 3 0.490 0.0 0.000 3
2 3 0.702 0.0 0.000 15
3 3 0.807 0.0 0.000 3
4 3 0.273 0.0 0.000 10
5 3 0.276 0.0 0.000 14
6 3 0.352 0.0 0.000 19
7 3 0.321 0.0 0.000 9
8 3 0.281 0.0 0.000 3
9 3 0.369 0.0 7.342 0
10 17 -0.818 0.0 0.000 17
11 17 -0.698 0.0 0.000 3
12 17 0.291 0.0 0.000 15
13 17 0.261 0.0 0.000 21
14 17 0.237 0.0 0.000 3
15 17 0.319 0.0 0.000 13
16 17 0.378 0.0 0.000 14
17 17 0.378 0.0 0.000 18
18 17 0.381 0.0 0.000 15
19 17 0.959 0.0 0.000 15
20 17 0.282 0.0 3.773 0
21 1 -0.447 0.0 0.000 17
22 1 0.297 0.0 0.000 16
23 1 1.083 0.0 0.000 9
24 1 1.237 0.0 0.000 15
25 1 0.300 0.0 0.000 25
26 1 0.453 0.0 0.000 13
27 1 1.796 0.0 0.000 23
28 1 0.297 0.0 0.000 15
29 1 0.991 0.0 0.000 19
... ... ... ... ... ...
1687 18 1.152 0.0 0.000 18
1688 18 0.382 0.0 0.000 9
1689 18 0.346 0.0 0.000 14
1690 18 0.254 0.0 0.000 15
1691 18 0.487 0.0 2.618 0
1692 12 0.514 0.0 0.000 4
1693 12 0.738 0.0 0.000 14
1694 12 0.408 0.0 0.000 12
1695 12 0.382 0.0 0.000 17
1696 12 0.432 0.0 0.000 14
1697 12 0.486 0.0 0.000 18
1698 12 0.786 0.0 0.000 9
1699 12 0.612 0.0 0.000 4
1700 12 0.737 0.0 0.000 9
1701 12 0.615 0.0 0.000 14
1702 12 0.534 0.0 0.000 15
1703 12 0.537 0.0 0.000 12
1704 12 0.481 0.0 4.215 0
1705 2 0.562 0.0 0.000 4
1706 2 0.955 0.0 0.000 14
1707 2 0.551 0.0 0.000 12
1708 2 0.830 0.0 0.000 17
1709 2 0.605 0.0 0.000 19
1710 2 0.643 0.0 0.000 5
1711 2 0.614 0.0 0.000 15
1712 2 0.487 0.0 0.000 18
1713 2 0.414 0.0 0.000 9
1714 2 0.281 0.0 0.000 14
1715 2 0.383 0.0 0.000 15
1716 2 0.492 0.0 0.000 0

1717 rows × 5 columns

Separar datos de entreno y datos de testeo

Cross Validation

Random Forest


In [305]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=101)
scores = cross_val_score(random_forest, data, target, cv=5)
print(scores)
print(scores.mean())


[ 0.61627907  0.6627907   0.62973761  0.68804665  0.66472303]
0.652315411214

SVM


In [306]:
from sklearn.model_selection import cross_val_score
from sklearn import svm

svm = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(svm, data, target, cv=5)
print(scores)
print(scores.mean())


[ 0.66569767  0.61918605  0.63556851  0.67638484  0.67055394]
0.653478201912

AdaBoost

datos originales


In [123]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score

ada = AdaBoostClassifier(n_estimators=100)

scores = cross_val_score(ada, data, target, cv=5)
print(scores)
print(scores.mean())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-123-b029c92b3316> in <module>()
      4 ada = AdaBoostClassifier(n_estimators=100)
      5 
----> 6 scores = cross_val_score(ada, data, target, cv=5)
      7 print(scores)
      8 print(scores.mean())

NameError: name 'target' is not defined

Pruebas con otro modelo


In [124]:
scores = cross_val_score(ada, tiempoMedioPalabra, targetTM, cv=5)
print(scores)
print(scores.mean())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-124-e364c7f111a5> in <module>()
----> 1 scores = cross_val_score(ada, tiempoMedioPalabra, targetTM, cv=5)
      2 print(scores)
      3 print(scores.mean())

NameError: name 'tiempoMedioPalabra' is not defined

Pruebas con modelo tiempo medio por caracter


In [125]:
scores = cross_val_score(ada, caracter, targerCaracter, cv=5)
print(scores)
print(scores.mean())


[ 0.75  0.8   0.5   0.5   0.6 ]
0.63

Entreno del modelo caracter, con los datos sin el target


In [126]:
# no se si estaria bien asi ya que caracter tiene el usuario
ada.fit(caracter,targerCaracter)


Out[126]:
AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=100, random_state=None)

Prediccion modelo sin Cross Validation


In [133]:
pred = ada.predict(caracterPred)
pred


Out[133]:
array([ 0.,  1.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
        1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  1.,
        0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  0.])

In [134]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(targerCaracterPred, pred)
print(accuracy)


0.442307692308

In [128]:
score = ada.score(caracter, caracterPred)
score


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-128-f675035d796f> in <module>()
----> 1 score = ada.score(caracter, caracterPred)
      2 score

C:\Users\Jesus\Anaconda2\lib\site-packages\sklearn\base.pyc in score(self, X, y, sample_weight)
    347         """
    348         from .metrics import accuracy_score
--> 349         return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
    350 
    351 

C:\Users\Jesus\Anaconda2\lib\site-packages\sklearn\metrics\classification.pyc in accuracy_score(y_true, y_pred, normalize, sample_weight)
    170 
    171     # Compute accuracy for each possible representation
--> 172     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    173     if y_type.startswith('multilabel'):
    174         differing_labels = count_nonzero(y_true - y_pred, axis=1)

C:\Users\Jesus\Anaconda2\lib\site-packages\sklearn\metrics\classification.pyc in _check_targets(y_true, y_pred)
     80     if len(y_type) > 1:
     81         raise ValueError("Can't handle mix of {0} and {1}"
---> 82                          "".format(type_true, type_pred))
     83 
     84     # We can't have more than one value on y_type => The set is no more needed

ValueError: Can't handle mix of continuous-multioutput and binary

In [130]:
caracter.describe()


Out[130]:
tiempoCaracter falloCaracter
count 52.000000 52.000000
mean 0.700285 0.051732
std 0.350833 0.081010
min 0.108727 0.000000
25% 0.435312 0.000000
50% 0.632725 0.030714
75% 0.904320 0.067241
max 1.783818 0.483871

In [131]:
caracterPred


Out[131]:
tiempoCaracter falloCaracter
usuario caracter
Cristhian A 0.569095 0.142857
B 1.009500 0.000000
C 0.389308 0.000000
D 0.337556 0.111111
E 0.503583 0.208333
F 1.528000 0.000000
G 0.855667 0.222222
H 0.398167 0.000000
I 1.026125 0.075000
J 1.131125 0.000000
K 1.040750 0.000000
L 0.381273 0.090909
M 1.865400 0.000000
N 0.475462 0.115385
O 0.483167 0.083333
P 0.124071 0.071429
Q 0.861000 0.000000
R 0.380778 0.166667
S 0.392167 0.000000
T 0.138250 0.000000
U 0.919100 0.200000
V 0.964833 0.166667
W 0.586875 0.125000
X 1.080750 0.000000
Y 0.816167 0.000000
Z 0.685667 0.083333
Jesus A 0.307900 0.050000
B 0.946875 0.000000
C -0.082455 0.000000
D 0.432500 0.000000
E -0.006687 0.062500
F 0.901500 0.166667
G 1.135857 0.000000
H 0.610143 0.000000
I 0.500436 0.000000
J 0.780714 0.000000
K 0.966750 0.000000
L 0.502700 0.000000
M 0.085286 0.071429
N 0.418423 0.076923
O 0.417227 0.000000
P -0.507231 0.000000
Q 0.061200 0.000000
R 0.273929 0.000000
S 0.424300 0.000000
T -0.041500 0.000000
U 0.681714 0.000000
V 0.683667 0.166667
W 0.476714 0.000000
X 0.430750 0.000000
Y 0.770167 0.000000
Z 0.048364 0.000000

In [132]:
caracter


Out[132]:
tiempoCaracter falloCaracter
usuario caracter
Cristhian A 1.728459 0.000000
B 1.783818 0.090909
C 0.296130 0.043478
D 0.502786 0.142857
E 0.375219 0.000000
F 1.002364 0.090909
G 0.585786 0.000000
H 0.665267 0.133333
I 0.654110 0.060976
J 1.355105 0.000000
K 0.796903 0.483871
L 0.677808 0.230769
M 0.435462 0.000000
N 0.424111 0.055556
O 0.466745 0.042553
P 0.257000 0.068966
Q 0.621400 0.000000
R 0.385037 0.000000
S 0.462542 0.083333
T 0.108727 0.090909
U 0.592500 0.000000
V 1.362300 0.000000
W 0.758063 0.125000
X 0.949000 0.000000
Y 1.036917 0.000000
Z 0.393727 0.000000
Jesus A 0.853449 0.038462
B 1.058324 0.108108
C 0.392875 0.020833
D 0.542654 0.038462
E 0.483250 0.046875
F 0.887500 0.000000
G 0.957929 0.000000
H 0.756556 0.037037
I 0.527510 0.019355
J 0.897094 0.062500
K 1.127969 0.000000
L 0.644050 0.000000
M 0.429925 0.000000
N 0.519525 0.049505
O 0.434865 0.022472
P 0.334519 0.018519
Q 0.438087 0.043478
R 0.515750 0.050000
S 0.428951 0.024390
T 0.376650 0.100000
U 0.815900 0.066667
V 1.048350 0.000000
W 0.670552 0.000000
X 0.649100 0.200000
Y 0.926000 0.000000
Z 1.020186 0.000000